
from lxml import etree
import requests
headers = {
    "User-Agent": "User-Agent, Mozilla/5.0 (BlackBerry; U; BlackBerry 9800; en) AppleWebKit/534.1+ (KHTML, like Gecko) Version/6.0.0.337 Mobile Safari/534.1+"
}

url = "https://movie.douban.com/cinema/nowplaying/wuhan/"

response = requests.get(url,headers=headers)

# print(response.content.decode('utf-8'))
#content返回的是没有经过处理的 是byte 类型
text = response.text #这个是解码都得字符串  str  Unicode类型

#写规则

html = etree.HTML(text)

uls = html.xpath("//ul[@class='lists']")[0]
lis = uls.xpath("./li")
movies = []
for li in lis:
    title = li.xpath("@data-title")[0]
    score = li.xpath("@data-score")[0]
    duration = li.xpath("@data-duration")[0]
    region = li.xpath("@data-region")[0]
    director = li.xpath("@data-director")[0]
    actors = li.xpath("@ data-actors")[0]
    thumbnail = li.xpath(".//img/@src")[0]

    movie = {
        "title":title,
        "score":score,
        "duration":duration,
        "region":region,
        "director":director,
        "actors":actors,
        "thumbnail":thumbnail,
    }

    movies.append(movie)
print(movies)